## SSM for FE estimates

library(dplyr)
library(lubridate)
library(KFAS)

load("data/bc_May23.RData")

## within transformation
bc <- bc %>%
    select(date, id, h, ff, gap, cpi) %>%
    group_by(date, id) %>%
    mutate(across(c(ff, gap, cpi), ~ . - mean(., na.rm = TRUE)))

## two state variables: beta and gamma
## Parameters: v0, v1, v2,

## create matrices for state-space model
## (see equation (1) in https://cran.r-project.org/web/packages/KFAS/vignettes/KFAS.pdf)
##  y_t = Z_t alpha_t + epsilon_t    Var(epsilon_t) = H_t
##  alpha_t+1 = T_t alpha_t + R_t eta_t    Var(eta_t) = Q_t
##  alpha_t \sim N(a_t, P_1)
## alpha_t = (i*, beta, gamma)

dates <- sort(unique(bc$date))   # time periods/surveys
nobs <- length(dates)  # number of surveys
p <- bc %>% group_by(date) %>% summarise(n=n()) %>% pull(n) %>% max    # maximum number of forecasts (across firms and horizons)
T <- diag(2)    # transition matrix
R <- diag(2)
a1 <- numeric(2)   # diffuse prior on initial observations of state variables
P1 <- matrix(0, 2, 2)
P1inf <- diag(2)
Y <- matrix(NA, nobs, p)           # observation matrix
Z <- array(NA, c(p, 2, nobs))      # coefficient matrices
for (t in 1:nobs) {
    tmp <- bc %>% filter(date == dates[t])  # forecasts in this survey
    pt <- nrow(tmp)  # number of forecasts
    Y[t, 1:pt] <- tmp$ff
    Z[1:pt, 1, t] <- tmp$cpi
    Z[1:pt, 2, t] <- tmp$gap
}

ssm <- function(pars) {
    ## create matrices for state space model
    H <- pars$v0 * diag(p)
    Q <- diag(c(pars$v1, pars$v2))
    SSModel(Y ~ -1 + SSMcustom(Z, T, R, Q, a1, P1, P1inf), H=H)
}
theta2pars <- function(theta)
    pars <- list(v0 = exp(theta[1]),
                 v1 = exp(theta[2]),
                 v2 = exp(theta[3]))
pars2theta <- function(pars)
    log(with(pars, c(v0, v1, v2)))
obj <- function(theta) {
    ## negative log-likelihood
    mod <- ssm(theta2pars(theta))
    negllk <- -KFS(mod)$logLik
    print(negllk)
    negllk
}

## initial values for numerical likelihood optimization
pars0 <- list(v0 = 0.4,
              v1 = 0.002, # 0.01 VAR / 0.1 SD of monthly change in beta
              v2 = 0.002) # 0.01 VAR / 0.1 SD of monthly change in gamma
theta0 <- pars2theta(pars0)

## optimization
(res1 <- optim(theta0, obj, gr=NULL, method="L-BFGS-B", control = list(trace = 2)))
(res2 <- optim(theta0, obj, gr=NULL, method="Nelder-Mead", control = list(trace = 2)))
print(res1)
print(res2)
res <- res1

theta_hat <- res$par
pars <- theta2pars(theta_hat)
tbl <- cbind(unlist(pars0), unlist(pars))
rownames(tbl) <- c("meas.error.var", "beta innov var", "gamma innov var")
colnames(tbl) <- c("Starting values", "Maximum likelihood estimate")
print(tbl)

## Kalman filter and smoother
mod <- ssm(pars)
kf <- KFS(mod)
cat("LLK:", kf$logLik, "\n")
X <- as.matrix(kf$alphahat) # smoothed states
V <- kf$V
SEs <- t(apply(V, 3, function(X) sqrt(diag(X))))

data <- bc %>% ungroup %>% select(date) %>% distinct
data$beta <- X[,1]
data$beta_lb <- X[,1] - 1.96*SEs[,1]
data$beta_ub <- X[,1] + 1.96*SEs[,1]
data$gamma <- X[,2]
data$gamma_lb <- X[,2] - 1.96*SEs[,2]
data$gamma_ub <- X[,2] + 1.96*SEs[,2]

save(data, file="output/ssm_fe.RData")
